Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
# Merge data frames based on sample
transplants_metadata<-sample_metadata%>%
mutate(Tube_code=str_remove_all(Tube_code, "_a"))
transplants_metadata$newID <- paste(transplants_metadata$Tube_code, "_", transplants_metadata$individual)
merged_data<-genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., transplants_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
filter(count > 0) #filter 0 counts
ggplot(merged_data, aes(x=newID,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ time_point + type , scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
labs(fill="Phylum",y = "Relative abundance",x="Sample")+
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size=0))5.1.1.1 Wild samples
merged_data %>%
filter(time_point=="0_Wild") %>%
ggplot(aes(x=newID,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ Population, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
labs(fill="Phylum",y = "Relative abundance",x="Sample")+
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size=0),
strip.text.x = element_text(size = 12)
)5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
arrange(-mean) %>%
tt()| phylum | mean | sd |
|---|---|---|
| p__Bacteroidota | 0.387533390 | 0.199408886 |
| p__Bacillota_A | 0.247736092 | 0.157285090 |
| p__Bacillota | 0.119551625 | 0.147933278 |
| p__Pseudomonadota | 0.094124112 | 0.158574923 |
| p__Campylobacterota | 0.054878320 | 0.094197988 |
| p__Verrucomicrobiota | 0.027807574 | 0.066974042 |
| p__Desulfobacterota | 0.023737705 | 0.036771812 |
| p__Chlamydiota | 0.010961883 | 0.060750276 |
| p__Fusobacteriota | 0.010557257 | 0.028638395 |
| p__Cyanobacteriota | 0.009276509 | 0.016635288 |
| p__Bacillota_C | 0.004811016 | 0.006724154 |
| p__Spirochaetota | 0.004098862 | 0.012506256 |
| p__Bacillota_B | 0.002556218 | 0.004994779 |
| p__Actinomycetota | 0.001281229 | 0.006458780 |
| p__Elusimicrobiota | 0.001088209 | 0.006278019 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")5.2 Taxonomy boxplot
5.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Bacteroidaceae | 2.260146e-01 | 0.1384706235 |
| f__Lachnospiraceae | 1.410833e-01 | 0.1062953893 |
| f__Tannerellaceae | 1.045659e-01 | 0.0799894745 |
| f__Helicobacteraceae | 5.448546e-02 | 0.0937279764 |
| f__Mycoplasmoidaceae | 3.756572e-02 | 0.0767893776 |
| f__Erysipelotrichaceae | 3.536287e-02 | 0.0452140267 |
| f__UBA3700 | 3.456595e-02 | 0.0565495010 |
| f__Marinifilaceae | 2.794365e-02 | 0.0272474083 |
| f__Rikenellaceae | 2.725202e-02 | 0.0471735191 |
| f__Enterobacteriaceae | 2.687327e-02 | 0.0929254305 |
| f__Coprobacillaceae | 2.627823e-02 | 0.0907456387 |
| f__ | 2.465083e-02 | 0.0781013121 |
| f__Desulfovibrionaceae | 2.373771e-02 | 0.0367718116 |
| f__DTU072 | 2.183007e-02 | 0.0377159876 |
| f__Ruminococcaceae | 1.832093e-02 | 0.0428115194 |
| f__Rhizobiaceae | 1.579679e-02 | 0.0779688169 |
| f__LL51 | 1.556592e-02 | 0.0616955422 |
| f__UBA3830 | 1.512118e-02 | 0.0441855701 |
| f__Akkermansiaceae | 1.224165e-02 | 0.0317653885 |
| f__Chlamydiaceae | 1.096188e-02 | 0.0607502761 |
| f__Fusobacteriaceae | 1.055726e-02 | 0.0286383947 |
| f__CAG-239 | 9.138683e-03 | 0.0152490206 |
| f__Enterococcaceae | 8.437601e-03 | 0.0473906561 |
| f__Gastranaerophilaceae | 7.848357e-03 | 0.0146292952 |
| f__Oscillospiraceae | 6.624721e-03 | 0.0075288565 |
| f__UBA1997 | 6.613196e-03 | 0.0315103296 |
| f__Streptococcaceae | 6.600789e-03 | 0.0348230465 |
| f__UBA1242 | 4.266475e-03 | 0.0147768596 |
| f__Brevinemataceae | 4.098862e-03 | 0.0125062564 |
| f__Acutalibacteraceae | 3.498766e-03 | 0.0111374416 |
| f__RUG11792 | 2.921450e-03 | 0.0255676374 |
| f__Clostridiaceae | 2.855351e-03 | 0.0174153876 |
| f__UBA660 | 2.600647e-03 | 0.0118148140 |
| f__Peptococcaceae | 2.556218e-03 | 0.0049947786 |
| f__Acidaminococcaceae | 1.980431e-03 | 0.0051045211 |
| f__CAG-508 | 1.874529e-03 | 0.0065256902 |
| f__MGBC116941 | 1.783700e-03 | 0.0077801927 |
| f__Moraxellaceae | 1.540093e-03 | 0.0099192011 |
| f__RUG14156 | 1.428152e-03 | 0.0045670616 |
| f__Staphylococcaceae | 1.411833e-03 | 0.0051727635 |
| f__Anaerovoracaceae | 1.410739e-03 | 0.0027876311 |
| f__Elusimicrobiaceae | 1.088209e-03 | 0.0062780187 |
| f__CAG-288 | 9.840222e-04 | 0.0061275213 |
| f__Anaerotignaceae | 9.320656e-04 | 0.0041174457 |
| f__CALVMC01 | 7.793540e-04 | 0.0044385554 |
| f__Eggerthellaceae | 6.643755e-04 | 0.0021620275 |
| f__Massilibacillaceae | 6.322621e-04 | 0.0016561037 |
| f__Mycobacteriaceae | 6.168531e-04 | 0.0061497354 |
| f__UBA1820 | 4.705627e-04 | 0.0013078764 |
| f__CAG-274 | 4.686117e-04 | 0.0022415212 |
| f__Arcobacteraceae | 3.928587e-04 | 0.0050156837 |
| f__Burkholderiaceae_C | 3.835606e-04 | 0.0048969735 |
| f__Muribaculaceae | 3.508548e-04 | 0.0009525792 |
| f__UBA932 | 3.295199e-04 | 0.0011408058 |
| f__Hepatoplasmataceae | 3.099135e-04 | 0.0039567109 |
| f__Rhodobacteraceae | 3.068016e-04 | 0.0039169801 |
| f__Weeksellaceae | 2.873650e-04 | 0.0032049404 |
| f__Eubacteriaceae | 1.707442e-04 | 0.0006844943 |
| f__Sphingobacteriaceae | 1.561202e-04 | 0.0012685229 |
| f__Devosiaceae | 1.544841e-04 | 0.0015368528 |
| f__Pumilibacteraceae | 1.324439e-04 | 0.0007783049 |
| f__WRAU01 | 9.956857e-05 | 0.0012712064 |
| f__Peptostreptococcaceae | 2.371535e-05 | 0.0003027773 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__")
genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
arrange(-mean) %>%
tt()| genus | mean | sd |
|---|---|---|
| g__Bacteroides | 1.374441e-01 | 0.0923562611 |
| g__Parabacteroides | 9.843371e-02 | 0.0803813676 |
| g__Phocaeicola | 7.109518e-02 | 0.0799972428 |
| g__Helicobacter_J | 3.115738e-02 | 0.0603033642 |
| g__Mycoplasmoides | 3.115302e-02 | 0.0765633496 |
| g__Odoribacter | 2.605667e-02 | 0.0268723184 |
| g__Roseburia | 2.425344e-02 | 0.0567044550 |
| g__NHYM01 | 2.332808e-02 | 0.0810376677 |
| g__Alistipes | 2.224698e-02 | 0.0287419149 |
| g__Coprobacillus | 2.070109e-02 | 0.0894282233 |
| g__Agrobacterium | 1.579679e-02 | 0.0779688169 |
| g__Akkermansia | 1.224165e-02 | 0.0317653885 |
| g__Fusobacterium_A | 1.046073e-02 | 0.0286441117 |
| g__Kineothrix | 9.107908e-03 | 0.0416218616 |
| g__Proteus | 8.976683e-03 | 0.0694135711 |
| g__Dielma | 8.687357e-03 | 0.0090713197 |
| g__CAG-95 | 8.238073e-03 | 0.0207930753 |
| g__JAAYNV01 | 7.265789e-03 | 0.0179169564 |
| g__Desulfovibrio | 7.219276e-03 | 0.0214990147 |
| g__UBA866 | 7.016767e-03 | 0.0295145125 |
| g__Enterococcus | 6.966137e-03 | 0.0463712943 |
| g__Lactococcus | 6.600789e-03 | 0.0348230465 |
| g__Ureaplasma | 6.412700e-03 | 0.0139267552 |
| g__Parabacteroides_B | 6.132159e-03 | 0.0101543965 |
| g__Lacrimispora | 6.028411e-03 | 0.0098068179 |
| g__CALXRO01 | 5.977964e-03 | 0.0313982647 |
| g__Citrobacter | 5.896711e-03 | 0.0340533686 |
| g__NSJ-61 | 5.745781e-03 | 0.0202234473 |
| g__Breznakia | 5.530147e-03 | 0.0240721461 |
| g__Clostridium_AQ | 5.522246e-03 | 0.0123487838 |
| g__Bilophila | 5.044501e-03 | 0.0089558435 |
| g__Hungatella_A | 4.964136e-03 | 0.0096921078 |
| g__Escherichia | 4.342538e-03 | 0.0270859242 |
| g__Salmonella | 4.319018e-03 | 0.0148769561 |
| g__UMGS1251 | 4.312965e-03 | 0.0073601071 |
| g__MGBC136627 | 4.305492e-03 | 0.0164533523 |
| g__Hungatella | 4.150386e-03 | 0.0194068227 |
| g__Clostridium_Q | 4.146767e-03 | 0.0052575243 |
| g__Brevinema | 4.098862e-03 | 0.0125062564 |
| g__Thomasclavelia | 4.046233e-03 | 0.0110779301 |
| g__Scatousia | 3.752075e-03 | 0.0104403539 |
| g__Mailhella | 3.745039e-03 | 0.0104110785 |
| g__Copromonas | 3.643508e-03 | 0.0050495456 |
| g__Enterocloster | 3.613702e-03 | 0.0047492729 |
| g__Ventrimonas | 3.566172e-03 | 0.0071931788 |
| g__Fournierella | 3.313097e-03 | 0.0063192740 |
| g__Limenecus | 3.230504e-03 | 0.0066725343 |
| g__Mucinivorans | 3.006847e-03 | 0.0379999623 |
| g__Lawsonia | 2.916613e-03 | 0.0103686789 |
| g__MGBC133411 | 2.902785e-03 | 0.0074333461 |
| g__Caccovivens | 2.887473e-03 | 0.0112659902 |
| g__Sarcina | 2.855351e-03 | 0.0174153876 |
| g__Eisenbergiella | 2.796704e-03 | 0.0069384489 |
| g__Bacteroides_G | 2.781473e-03 | 0.0352463088 |
| g__CAJLXD01 | 2.730769e-03 | 0.0088951735 |
| g__Acetatifactor | 2.654208e-03 | 0.0055286194 |
| g__Blautia | 2.598789e-03 | 0.0062369300 |
| g__Velocimicrobium | 2.235984e-03 | 0.0067748392 |
| g__C-19 | 2.235603e-03 | 0.0048296119 |
| g__CAZU01 | 2.189719e-03 | 0.0066369837 |
| g__Negativibacillus | 2.145239e-03 | 0.0056002700 |
| g__Intestinimonas | 2.003816e-03 | 0.0035552824 |
| g__Rikenella | 1.998193e-03 | 0.0037323264 |
| g__Phascolarctobacterium | 1.980431e-03 | 0.0051045211 |
| g__Butyricimonas | 1.886974e-03 | 0.0042483569 |
| g__RGIG6463 | 1.855727e-03 | 0.0040258495 |
| g__MGBC116941 | 1.783700e-03 | 0.0077801927 |
| g__JALFVM01 | 1.712574e-03 | 0.0038669765 |
| g__Oscillibacter | 1.546231e-03 | 0.0025273862 |
| g__Acinetobacter | 1.540093e-03 | 0.0099192011 |
| g__Pseudoflavonifractor | 1.489286e-03 | 0.0027026675 |
| g__Citrobacter_A | 1.444197e-03 | 0.0061395639 |
| g__Staphylococcus | 1.411833e-03 | 0.0051727635 |
| g__14-2 | 1.228249e-03 | 0.0098038984 |
| g__RGIG4733 | 1.225656e-03 | 0.0038271024 |
| g__Beduini | 1.217163e-03 | 0.0025500013 |
| g__Scatocola | 1.162463e-03 | 0.0045748144 |
| g__Enterococcus_A | 1.123893e-03 | 0.0100947603 |
| g__UBA1436 | 1.088209e-03 | 0.0062780187 |
| g__Faecisoma | 1.057958e-03 | 0.0056043491 |
| g__RGIG9287 | 9.993630e-04 | 0.0094920364 |
| g__CAG-345 | 9.840222e-04 | 0.0061275213 |
| g__Lachnotalea | 9.593025e-04 | 0.0033990676 |
| g__Blautia_A | 9.546974e-04 | 0.0029560949 |
| g__Ruthenibacterium | 8.602962e-04 | 0.0024818365 |
| g__CAG-269 | 8.276280e-04 | 0.0048017072 |
| g__Marseille-P3106 | 8.230475e-04 | 0.0017874580 |
| g__WRHT01 | 6.666234e-04 | 0.0027445999 |
| g__Eggerthella | 6.643755e-04 | 0.0021620275 |
| g__CHH4-2 | 6.371240e-04 | 0.0020328940 |
| g__Corynebacterium | 6.168531e-04 | 0.0061497354 |
| g__Serratia_A | 6.076344e-04 | 0.0077577570 |
| g__Anaerotruncus | 6.058602e-04 | 0.0016447558 |
| g__RUG14156 | 5.735678e-04 | 0.0021869659 |
| g__RGIG1896 | 5.683407e-04 | 0.0051791669 |
| g__IOR16 | 5.574841e-04 | 0.0016418264 |
| g__Faecimonas | 5.146607e-04 | 0.0054508437 |
| g__CAG-56 | 5.096368e-04 | 0.0016613952 |
| g__MGBC140009 | 4.851579e-04 | 0.0024491799 |
| g__CALURL01 | 4.805911e-04 | 0.0017020401 |
| g__Merdimorpha | 4.705627e-04 | 0.0013078764 |
| g__RGIG8482 | 4.560993e-04 | 0.0030287706 |
| g__Enterobacter | 4.223379e-04 | 0.0042068345 |
| g__Klebsiella | 4.203682e-04 | 0.0049802041 |
| g__Caccenecus | 4.086273e-04 | 0.0018112589 |
| g__Aliarcobacter | 3.928587e-04 | 0.0050156837 |
| g__Scatenecus | 3.851876e-04 | 0.0018282510 |
| g__Alcaligenes | 3.835606e-04 | 0.0048969735 |
| g__Plesiomonas | 3.766988e-04 | 0.0027593254 |
| g__JAHHSE01 | 3.529590e-04 | 0.0014998851 |
| g__HGM05232 | 3.508548e-04 | 0.0009525792 |
| g__Enterococcus_B | 3.475714e-04 | 0.0022665993 |
| g__Egerieousia | 3.295199e-04 | 0.0011408058 |
| g__Stoquefichus | 3.137462e-04 | 0.0020871798 |
| g__Hepatoplasma | 3.099135e-04 | 0.0039567109 |
| g__Paracoccus | 3.068016e-04 | 0.0039169801 |
| g__Moheibacter | 2.873650e-04 | 0.0032049404 |
| g__Scatomorpha | 2.738230e-04 | 0.0010358302 |
| g__Emergencia | 2.601331e-04 | 0.0013298673 |
| g__UBA7185 | 2.523935e-04 | 0.0014817660 |
| g__Eubacterium | 1.707442e-04 | 0.0006844943 |
| g__Sphingobacterium | 1.561202e-04 | 0.0012685229 |
| g__Devosia | 1.544841e-04 | 0.0015368528 |
| g__Anaerosporobacter | 1.507638e-04 | 0.0012978048 |
| g__Caccomorpha | 1.434035e-04 | 0.0010730603 |
| g__UBA2658 | 1.355578e-04 | 0.0007332702 |
| g__Protoclostridium | 1.324439e-04 | 0.0007783049 |
| g__Angelakisella | 1.315171e-04 | 0.0009387427 |
| g__Cetobacterium_A | 9.652924e-05 | 0.0008876688 |
| g__Rahnella | 6.708891e-05 | 0.0008565338 |
| g__Peptostreptococcus | 2.371535e-05 | 0.0003027773 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
mutate(genus= sub("^g__", "", genus)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")